Python

python截取长网页 并发送邮件

2019/11/15 12:14 36006 次阅读王梓
★ 打赏
✸ ✸ ✸

python截取长网页 并将截取的网页保存至本地 后发送邮件

准备工作 安装依赖

yum install chromedriver

yum install https://dl.google.com/linux/direct/google-chrome-stable_current_x86_64.rpm

pip install selenium

核心代码

#!/bin/python
# coding=utf-8
# author: wz
# mail: 277215243@qq.com
# datetime:2019/10/15 12:42 PM
# web: https://www.bthlt.com

from selenium import webdriver
import time
import os.path
import multiprocessing as mp
from selenium.webdriver.chrome.options import Options
from email import encoders
from email.mime.base import MIMEBase
from email.header import Header
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from email.mime.image import MIMEImage

mailto_list = ['277215243@qq.com']
mail_host = 'smtp.163.com'
mail_user = '******@163.com'
mail_pass = '******'
mail_postfix = '163.com'

def webshot():
    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument('lang=zh_CN.UTF-8')
    chrome_options.add_argument('--headless')
    chrome_options.add_argument('--no-sandbox')
    driver = webdriver.Chrome(options=chrome_options)
    driver.maximize_window()
    js_height = "return document.body.clientHeight"
    picname = 'tbc.png'
    link = 'https://www.bthlt.com/'
    try:
        driver.get(link)
        k = 1
        height = driver.execute_script(js_height)
        while True:
            if k * 800 < height:
                js_move = "window.scrollTo(0,{})".format(k * 800)
                print(js_move)
                driver.execute_script(js_move)
                time.sleep(0.2)
                height = driver.execute_script(js_height)
                k += 1
            else:
                break
        scroll_width = driver.execute_script('return document.body.parentNode.scrollWidth')
        scroll_height = driver.execute_script('return document.body.parentNode.scrollHeight')
        driver.set_window_size(scroll_width, scroll_height)
        driver.get_screenshot_as_file(
            "/data/www/" + picname)
        print("Process {} get one pic !!!".format(os.getpid()))
        time.sleep(3)
    except Exception as e:
        print(picname, e)

def send_mail(to_list, sub):
    me = '葫芦' + '<' + mail_user + '@' + mail_postfix + '>'
    msg = MIMEMultipart()
    msg['Subject'] = sub
    msg['From'] = me
    msg['to'] = ';'.join(to_list)
    body = """
<html lang="en">
    <body>
    <h1>脚本网页截图</h1>
    <hr />
    <br />
    <a href="https://www.bthlt.com">葫芦的运维日志</a>
    <br />
    ![](cid:0)
    </body>
    </html>
"""
    msg.attach(MIMEText(body, 'html', 'utf-8'))
    with open('/data/www/tbc.png', 'rb') as f:
        mime = MIMEBase('image', 'png', filename='tbc.png')
        mime.add_header('Content-Disposition', 'attachment', filename='tbc.png')
        mime.add_header('Content-ID', '<0>')
        mime.add_header('X-Attachment-Id', '0')
        mime.set_payload(f.read())
        encoders.encode_base64(mime)
        msg.attach(mime)
    try:
        server = smtplib.SMTP()
        server.connect(mail_host)
        server.login(mail_user, mail_pass)
        server.sendmail(me, to_list, msg.as_string())
        server.close()
        return True
    except Exception as e:
        print(e)
        return False

if __name__ == '__main__':
    t = time.time()
    webshot()
    if send_mail(mailto_list, '脚本网页截图 并发送邮件'):
        print('发送成功')
    else:
        print('发送失败')
    print("操作结束,耗时:{:.2f}秒".format(float(time.time() - t)))

-rw-r--r-- 1 root root 2.5M Nov 15 12:46 /data/www/tbc.png

结果

待解决

中文乱码问题未能解决 

补充已解决

将本地中文字体传至 /usr/share/fonts目录中解决中文字体问题

✸ ✸ ✸

📜 版权声明

本文作者:王梓 | 原文链接:https://www.bthlt.com/note/12778485-Pythonpython截取长网页 并发送邮件

出处:葫芦的运维日志 | 转载请注明出处并保留原文链接

📜 留言板

留言提交后需管理员审核通过才会显示